1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 package sun.text.normalizer;
38
39 import java.io.BufferedInputStream;
40 import java.io.ByteArrayInputStream;
41 import java.io.IOException;
42 import java.io.BufferedInputStream;
43 import java.io.InputStream;
44
45
46
47
48 public final class NormalizerImpl {
49
50 static final NormalizerImpl IMPL;
51
52 static
53 {
54 try
55 {
56 IMPL = new NormalizerImpl();
57 }
58 catch (Exception e)
59 {
60 throw new RuntimeException(e.getMessage());
61 }
62 }
63
64 static final int UNSIGNED_BYTE_MASK =0xFF;
65 static final long UNSIGNED_INT_MASK = 0xffffffffL;
66
67
68
69
70
71 private static final String DATA_FILE_NAME = "/sun/text/resources/unorm.icu";
72
73
74
75
76 public static final int QC_NFC=0x11;
77 public static final int QC_NFKC=0x22;
78 public static final int QC_NFD=4;
79 public static final int QC_NFKD=8;
80
81 public static final int QC_ANY_NO=0xf;
82
83
84
85
86 public static final int QC_MAYBE=0x10;
87 public static final int QC_ANY_MAYBE=0x30;
88
89 public static final int QC_MASK=0x3f;
90
91 private static final int COMBINES_FWD=0x40;
92 private static final int COMBINES_BACK=0x80;
93 public static final int COMBINES_ANY=0xc0;
94
95 private static final int CC_SHIFT=8;
96 public static final int CC_MASK=0xff00;
97
98 private static final int EXTRA_SHIFT=16;
99
100
101 private static final long MIN_SPECIAL = (long)(0xfc000000 & UNSIGNED_INT_MASK);
102 private static final long SURROGATES_TOP = (long)(0xfff00000 & UNSIGNED_INT_MASK);
103 private static final long MIN_HANGUL = (long)(0xfff00000 & UNSIGNED_INT_MASK);
104
105 private static final long JAMO_V_TOP = (long)(0xfff30000 & UNSIGNED_INT_MASK);
106
107
108
109
110 static final int INDEX_TRIE_SIZE = 0;
111
112 static final int INDEX_CHAR_COUNT = 1;
113
114 static final int INDEX_COMBINE_DATA_COUNT = 2;
115
116 public static final int INDEX_MIN_NFC_NO_MAYBE = 6;
117
118 public static final int INDEX_MIN_NFKC_NO_MAYBE = 7;
119
120 public static final int INDEX_MIN_NFD_NO_MAYBE = 8;
121
122 public static final int INDEX_MIN_NFKD_NO_MAYBE = 9;
123
124 static final int INDEX_FCD_TRIE_SIZE = 10;
125
126 static final int INDEX_AUX_TRIE_SIZE = 11;
127
128 static final int INDEX_TOP = 32;
129
130
131
132
133 private static final int AUX_UNSAFE_SHIFT = 11;
134 private static final int AUX_COMP_EX_SHIFT = 10;
135 private static final int AUX_NFC_SKIPPABLE_F_SHIFT = 12;
136
137 private static final int AUX_MAX_FNC = ((int)1<<AUX_COMP_EX_SHIFT);
138 private static final int AUX_UNSAFE_MASK = (int)((1<<AUX_UNSAFE_SHIFT) & UNSIGNED_INT_MASK);
139 private static final int AUX_FNC_MASK = (int)((AUX_MAX_FNC-1) & UNSIGNED_INT_MASK);
140 private static final int AUX_COMP_EX_MASK = (int)((1<<AUX_COMP_EX_SHIFT) & UNSIGNED_INT_MASK);
141 private static final long AUX_NFC_SKIP_F_MASK = ((UNSIGNED_INT_MASK&1)<<AUX_NFC_SKIPPABLE_F_SHIFT);
142
143 private static final int MAX_BUFFER_SIZE = 20;
144
145
146
147
148 static final class NormTrieImpl implements Trie.DataManipulate{
149 static IntTrie normTrie= null;
150
151
152
153
154
155
156
157
158 public int getFoldingOffset(int value){
159 return BMP_INDEX_LENGTH+
160 ((value>>(EXTRA_SHIFT-SURROGATE_BLOCK_BITS))&
161 (0x3ff<<SURROGATE_BLOCK_BITS));
162 }
163
164 }
165 static final class FCDTrieImpl implements Trie.DataManipulate{
166 static CharTrie fcdTrie=null;
167
168
169
170
171
172
173
174
175 public int getFoldingOffset(int value){
176 return value;
177 }
178 }
179
180 static final class AuxTrieImpl implements Trie.DataManipulate{
181 static CharTrie auxTrie = null;
182
183
184
185
186
187
188
189
190 public int getFoldingOffset(int value){
191 return (int)(value &AUX_FNC_MASK)<<SURROGATE_BLOCK_BITS;
192 }
193 }
194
195
196
197
198 private static FCDTrieImpl fcdTrieImpl;
199 private static NormTrieImpl normTrieImpl;
200 private static AuxTrieImpl auxTrieImpl;
201 private static int[] indexes;
202 private static char[] combiningTable;
203 private static char[] extraData;
204
205 private static boolean isDataLoaded;
206 private static boolean isFormatVersion_2_1;
207 private static boolean isFormatVersion_2_2;
208 private static byte[] unicodeVersion;
209
210
211
212
213 private static final int DATA_BUFFER_SIZE = 25000;
214
215
216
217
218
219 public static final int MIN_WITH_LEAD_CC=0x300;
220
221
222
223
224
225
226
227
228
229 private static final int DECOMP_FLAG_LENGTH_HAS_CC=0x80;
230
231
232
233 private static final int DECOMP_LENGTH_MASK=0x7f;
234
235
236 private static final int BMP_INDEX_LENGTH=0x10000>>Trie.INDEX_STAGE_1_SHIFT_;
237
238
239
240 private static final int SURROGATE_BLOCK_BITS=10-Trie.INDEX_STAGE_1_SHIFT_;
241
242
243
244 public static int getFromIndexesArr(int index){
245 return indexes[index];
246 }
247
248
249
250
251
252
253
254 private NormalizerImpl() throws IOException {
255
256 if(!isDataLoaded){
257
258
259 InputStream i = ICUData.getRequiredStream(DATA_FILE_NAME);
260 BufferedInputStream b = new BufferedInputStream(i,DATA_BUFFER_SIZE);
261 NormalizerDataReader reader = new NormalizerDataReader(b);
262
263
264 indexes = reader.readIndexes(NormalizerImpl.INDEX_TOP);
265
266 byte[] normBytes = new byte[indexes[NormalizerImpl.INDEX_TRIE_SIZE]];
267
268 int combiningTableTop = indexes[NormalizerImpl.INDEX_COMBINE_DATA_COUNT];
269 combiningTable = new char[combiningTableTop];
270
271 int extraDataTop = indexes[NormalizerImpl.INDEX_CHAR_COUNT];
272 extraData = new char[extraDataTop];
273
274 byte[] fcdBytes = new byte[indexes[NormalizerImpl.INDEX_FCD_TRIE_SIZE]];
275 byte[] auxBytes = new byte[indexes[NormalizerImpl.INDEX_AUX_TRIE_SIZE]];
276
277 fcdTrieImpl = new FCDTrieImpl();
278 normTrieImpl = new NormTrieImpl();
279 auxTrieImpl = new AuxTrieImpl();
280
281
282 reader.read(normBytes, fcdBytes,auxBytes, extraData, combiningTable);
283
284 NormTrieImpl.normTrie = new IntTrie( new ByteArrayInputStream(normBytes),normTrieImpl );
285 FCDTrieImpl.fcdTrie = new CharTrie( new ByteArrayInputStream(fcdBytes),fcdTrieImpl );
286 AuxTrieImpl.auxTrie = new CharTrie( new ByteArrayInputStream(auxBytes),auxTrieImpl );
287
288
289
290 isDataLoaded = true;
291
292
293 byte[] formatVersion = reader.getDataFormatVersion();
294
295 isFormatVersion_2_1 =( formatVersion[0]>2
296 ||
297 (formatVersion[0]==2 && formatVersion[1]>=1)
298 );
299 isFormatVersion_2_2 =( formatVersion[0]>2
300 ||
301 (formatVersion[0]==2 && formatVersion[1]>=2)
302 );
303 unicodeVersion = reader.getUnicodeVersion();
304 b.close();
305 }
306 }
307
308
309
310
311
312 public static final int JAMO_L_BASE=0x1100;
313 public static final int JAMO_V_BASE=0x1161;
314 public static final int JAMO_T_BASE=0x11a7;
315
316 public static final int HANGUL_BASE=0xac00;
317
318 public static final int JAMO_L_COUNT=19;
319 public static final int JAMO_V_COUNT=21;
320 public static final int JAMO_T_COUNT=28;
321 public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;
322
323 private static boolean isHangulWithoutJamoT(char c) {
324 c-=HANGUL_BASE;
325 return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
326 }
327
328
329
330
331 private static boolean isNorm32Regular(long norm32) {
332 return norm32<MIN_SPECIAL;
333 }
334
335
336 private static boolean isNorm32LeadSurrogate(long norm32) {
337 return MIN_SPECIAL<=norm32 && norm32<SURROGATES_TOP;
338 }
339
340
341 private static boolean isNorm32HangulOrJamo(long norm32) {
342 return norm32>=MIN_HANGUL;
343 }
344
345
346
347
348
349 private static boolean isJamoVTNorm32JamoV(long norm32) {
350 return norm32<JAMO_V_TOP;
351 }
352
353
354
355 public static long getNorm32(char c) {
356 return ((UNSIGNED_INT_MASK) & (NormTrieImpl.normTrie.getLeadValue(c)));
357 }
358
359 public static long getNorm32FromSurrogatePair(long norm32,
360 char c2) {
361
362
363
364
365 return ((UNSIGNED_INT_MASK) &
366 NormTrieImpl.normTrie.getTrailValue((int)norm32, c2));
367 }
368
369 private static long getNorm32(int c){
370 return (UNSIGNED_INT_MASK&(NormTrieImpl.normTrie.getCodePointValue(c)));
371 }
372
373
374
375
376
377 private static long getNorm32(char[] p,int start,
378 int mask) {
379 long norm32= getNorm32(p[start]);
380 if(((norm32&mask)>0) && isNorm32LeadSurrogate(norm32)) {
381
382 norm32=getNorm32FromSurrogatePair(norm32, p[start+1]);
383 }
384 return norm32;
385 }
386
387
388 public static VersionInfo getUnicodeVersion(){
389 return VersionInfo.getInstance(unicodeVersion[0], unicodeVersion[1],
390 unicodeVersion[2], unicodeVersion[3]);
391 }
392
393 public static char getFCD16(char c) {
394 return FCDTrieImpl.fcdTrie.getLeadValue(c);
395 }
396
397 public static char getFCD16FromSurrogatePair(char fcd16, char c2) {
398
399
400
401 return FCDTrieImpl.fcdTrie.getTrailValue(fcd16, c2);
402 }
403 public static int getFCD16(int c) {
404 return FCDTrieImpl.fcdTrie.getCodePointValue(c);
405 }
406
407 private static int getExtraDataIndex(long norm32) {
408 return (int)(norm32>>EXTRA_SHIFT);
409 }
410
411 private static final class DecomposeArgs{
412 int cc;
413 int trailCC;
414 int length;
415 }
416
417
418
419
420
421
422 private static int decompose(long norm32,
423 int qcMask,
424 DecomposeArgs args) {
425 int p= getExtraDataIndex(norm32);
426 args.length=extraData[p++];
427
428 if((norm32&qcMask&QC_NFKD)!=0 && args.length>=0x100) {
429
430 p+=((args.length>>7)&1)+(args.length&DECOMP_LENGTH_MASK);
431 args.length>>=8;
432 }
433
434 if((args.length&DECOMP_FLAG_LENGTH_HAS_CC)>0) {
435
436 char bothCCs=extraData[p++];
437 args.cc=(UNSIGNED_BYTE_MASK) & (bothCCs>>8);
438 args.trailCC=(UNSIGNED_BYTE_MASK) & bothCCs;
439 } else {
440
441 args.cc=args.trailCC=0;
442 }
443
444 args.length&=DECOMP_LENGTH_MASK;
445 return p;
446 }
447
448
449
450
451
452
453 private static int decompose(long norm32,
454 DecomposeArgs args) {
455
456 int p= getExtraDataIndex(norm32);
457 args.length=extraData[p++];
458
459 if((args.length&DECOMP_FLAG_LENGTH_HAS_CC)>0) {
460
461 char bothCCs=extraData[p++];
462 args.cc=(UNSIGNED_BYTE_MASK) & (bothCCs>>8);
463 args.trailCC=(UNSIGNED_BYTE_MASK) & bothCCs;
464 } else {
465
466 args.cc=args.trailCC=0;
467 }
468
469 args.length&=DECOMP_LENGTH_MASK;
470 return p;
471 }
472
473
474 private static final class NextCCArgs{
475 char[] source;
476 int next;
477 int limit;
478 char c;
479 char c2;
480 }
481
482
483
484
485
486
487 private static int getNextCC(NextCCArgs args) {
488 long norm32;
489
490 args.c=args.source[args.next++];
491
492 norm32= getNorm32(args.c);
493 if((norm32 & CC_MASK)==0) {
494 args.c2=0;
495 return 0;
496 } else {
497 if(!isNorm32LeadSurrogate(norm32)) {
498 args.c2=0;
499 } else {
500
501 if(args.next!=args.limit &&
502 UTF16.isTrailSurrogate(args.c2=args.source[args.next])){
503 ++args.next;
504 norm32=getNorm32FromSurrogatePair(norm32, args.c2);
505 } else {
506 args.c2=0;
507 return 0;
508 }
509 }
510
511 return (int)((UNSIGNED_BYTE_MASK) & (norm32>>CC_SHIFT));
512 }
513 }
514
515 private static final class PrevArgs{
516 char[] src;
517 int start;
518 int current;
519 char c;
520 char c2;
521 }
522
523
524
525
526
527
528
529 private static long getPrevNorm32(PrevArgs args,
530 int minC,
531 int mask) {
532 long norm32;
533
534 args.c=args.src[--args.current];
535 args.c2=0;
536
537
538
539
540 if(args.c<minC) {
541 return 0;
542 } else if(!UTF16.isSurrogate(args.c)) {
543 return getNorm32(args.c);
544 } else if(UTF16.isLeadSurrogate(args.c)) {
545
546 return 0;
547 } else if(args.current!=args.start &&
548 UTF16.isLeadSurrogate(args.c2=args.src[args.current-1])) {
549 --args.current;
550 norm32=getNorm32(args.c2);
551
552 if((norm32&mask)==0) {
553
554
555
556 return 0;
557 } else {
558
559 return getNorm32FromSurrogatePair(norm32, args.c);
560 }
561 } else {
562
563 args.c2=0;
564 return 0;
565 }
566 }
567
568
569
570
571
572 private static int getPrevCC(PrevArgs args) {
573
574 return (int)((UNSIGNED_BYTE_MASK)&(getPrevNorm32(args, MIN_WITH_LEAD_CC,
575 CC_MASK)>>CC_SHIFT));
576 }
577
578
579
580
581
582 public static boolean isNFDSafe(long norm32,
583 intccOrQCMask,
584 int decompQCMask) {
585 if((norm32&ccOrQCMask)==0) {
586 return true;
587 }
588
589
590 if(isNorm32Regular(norm32) && (norm32&decompQCMask)!=0) {
591 DecomposeArgs args=new DecomposeArgs();
592
593 decompose(norm32, decompQCMask, args);
594 return args.cc==0;
595 } else {
596
597 return (norm32&CC_MASK)==0;
598 }
599 }
600
601
602
603
604
605 public static boolean isTrueStarter(long norm32,
606 int ccOrQCMask,
607 int decompQCMask) {
608 if((norm32&ccOrQCMask)==0) {
609 return true;
610 }
611
612
613 if((norm32&decompQCMask)!=0) {
614 int p;
615 DecomposeArgs args=new DecomposeArgs();
616
617 p=decompose(norm32, decompQCMask, args);
618
619 if(args.cc==0) {
620 int qcMask=ccOrQCMask&QC_MASK;
621
622
623 if((getNorm32(extraData,p, qcMask)&qcMask)==0) {
624
625 return true;
626 }
627 }
628 }
629 return false;
630 }
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649 private static int insertOrdered(char[] source,
650 int start,
651 int current, int p,
652 char c, char c2,
653 int cc) {
654 int back, preBack;
655 int r;
656 int prevCC, trailCC=cc;
657
658 if(start<current && cc!=0) {
659
660 preBack=back=current;
661 PrevArgs prevArgs = new PrevArgs();
662 prevArgs.current = current;
663 prevArgs.start = start;
664 prevArgs.src = source;
665
666 prevCC=getPrevCC(prevArgs);
667 preBack = prevArgs.current;
668
669 if(cc<prevCC) {
670
671 trailCC=prevCC;
672 back=preBack;
673 while(start<preBack) {
674 prevCC=getPrevCC(prevArgs);
675 preBack=prevArgs.current;
676 if(cc>=prevCC) {
677 break;
678 }
679 back=preBack;
680 }
681
682
683
684
685
686
687
688
689
690 r=p;
691 do {
692 source[--r]=source[--current];
693 } while(back!=current);
694 }
695 }
696
697
698 source[current]=c;
699 if(c2!=0) {
700 source[(current+1)]=c2;
701 }
702
703
704 return trailCC;
705 }
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730 private static int mergeOrdered(char[] source,
731 int start,
732 int current,
733 char[] data,
734 int next,
735 int limit,
736 boolean isOrdered) {
737 int r;
738 int cc, trailCC=0;
739 boolean adjacent;
740
741 adjacent= current==next;
742 NextCCArgs ncArgs = new NextCCArgs();
743 ncArgs.source = data;
744 ncArgs.next = next;
745 ncArgs.limit = limit;
746
747 if(start!=current || !isOrdered) {
748
749 while(ncArgs.next<ncArgs.limit) {
750 cc=getNextCC(ncArgs);
751 if(cc==0) {
752
753 trailCC=0;
754 if(adjacent) {
755 current=ncArgs.next;
756 } else {
757 data[current++]=ncArgs.c;
758 if(ncArgs.c2!=0) {
759 data[current++]=ncArgs.c2;
760 }
761 }
762 if(isOrdered) {
763 break;
764 } else {
765 start=current;
766 }
767 } else {
768 r=current+(ncArgs.c2==0 ? 1 : 2);
769 trailCC=insertOrdered(source,start, current, r,
770 ncArgs.c, ncArgs.c2, cc);
771 current=r;
772 }
773 }
774 }
775
776 if(ncArgs.next==ncArgs.limit) {
777
778 return trailCC;
779 } else {
780 if(!adjacent) {
781
782 do {
783 source[current++]=data[ncArgs.next++];
784 } while(ncArgs.next!=ncArgs.limit);
785 ncArgs.limit=current;
786 }
787 PrevArgs prevArgs = new PrevArgs();
788 prevArgs.src = data;
789 prevArgs.start = start;
790 prevArgs.current = ncArgs.limit;
791 return getPrevCC(prevArgs);
792 }
793
794 }
795 private static int mergeOrdered(char[] source,
796 int start,
797 int current,
798 char[] data,
799 final int next,
800 final int limit) {
801 return mergeOrdered(source,start,current,data,next,limit,true);
802 }
803
804 public static NormalizerBase.QuickCheckResult quickCheck(char[] src,
805 int srcStart,
806 int srcLimit,
807 int minNoMaybe,
808 int qcMask,
809 int options,
810 boolean allowMaybe,
811 UnicodeSet nx){
812
813 int ccOrQCMask;
814 long norm32;
815 char c, c2;
816 char cc, prevCC;
817 long qcNorm32;
818 NormalizerBase.QuickCheckResult result;
819 ComposePartArgs args = new ComposePartArgs();
820 char[] buffer ;
821 int start = srcStart;
822
823 if(!isDataLoaded) {
824 return NormalizerBase.MAYBE;
825 }
826
827 ccOrQCMask=CC_MASK|qcMask;
828 result=NormalizerBase.YES;
829 prevCC=0;
830
831 for(;;) {
832 for(;;) {
833 if(srcStart==srcLimit) {
834 return result;
835 } else if((c=src[srcStart++])>=minNoMaybe &&
836 (( norm32=getNorm32(c)) & ccOrQCMask)!=0) {
837 break;
838 }
839 prevCC=0;
840 }
841
842
843
844 if(isNorm32LeadSurrogate(norm32)) {
845
846 if(srcStart!=srcLimit&& UTF16.isTrailSurrogate(c2=src[srcStart])) {
847 ++srcStart;
848 norm32=getNorm32FromSurrogatePair(norm32,c2);
849 } else {
850 norm32=0;
851 c2=0;
852 }
853 }else{
854 c2=0;
855 }
856 if(nx_contains(nx, c, c2)) {
857
858 norm32=0;
859 }
860
861
862 cc=(char)((norm32>>CC_SHIFT)&0xFF);
863 if(cc!=0 && cc<prevCC) {
864 return NormalizerBase.NO;
865 }
866 prevCC=cc;
867
868
869 qcNorm32 = norm32 & qcMask;
870 if((qcNorm32& QC_ANY_NO)>=1) {
871 result= NormalizerBase.NO;
872 break;
873 } else if(qcNorm32!=0) {
874
875 if(allowMaybe){
876 result=NormalizerBase.MAYBE;
877 }else{
878
879
880 int prevStarter;
881 int decompQCMask;
882
883 decompQCMask=(qcMask<<2)&0xf;
884
885
886
887
888 prevStarter=srcStart-1;
889 if(UTF16.isTrailSurrogate(src[prevStarter])) {
890
891
892 --prevStarter;
893 }
894 prevStarter=findPreviousStarter(src, start, prevStarter,
895 ccOrQCMask, decompQCMask,
896 (char)minNoMaybe);
897
898
899
900 srcStart=findNextStarter(src,srcStart, srcLimit, qcMask,
901 decompQCMask,(char) minNoMaybe);
902
903
904 args.prevCC = prevCC;
905
906
907 buffer = composePart(args,prevStarter,src,srcStart,srcLimit,options,nx);
908
909
910 if(0!=strCompare(buffer,0,args.length,src,prevStarter,srcStart, false)) {
911 result=NormalizerBase.NO;
912 break;
913 }
914
915
916 }
917 }
918 }
919 return result;
920 }
921
922
923
924
925
926
927 public static int decompose(char[] src,int srcStart,int srcLimit,
928 char[] dest,int destStart,int destLimit,
929 boolean compat,int[] outTrailCC,
930 UnicodeSet nx) {
931
932 char[] buffer = new char[3];
933 int prevSrc;
934 long norm32;
935 int ccOrQCMask, qcMask;
936 int reorderStartIndex, length;
937 char c, c2, minNoMaybe;
938 int cc, prevCC, trailCC;
939 char[] p;
940 int pStart;
941 int destIndex = destStart;
942 int srcIndex = srcStart;
943 if(!compat) {
944 minNoMaybe=(char)indexes[INDEX_MIN_NFD_NO_MAYBE];
945 qcMask=QC_NFD;
946 } else {
947 minNoMaybe=(char)indexes[INDEX_MIN_NFKD_NO_MAYBE];
948 qcMask=QC_NFKD;
949 }
950
951
952 ccOrQCMask=CC_MASK|qcMask;
953 reorderStartIndex=0;
954 prevCC=0;
955 norm32=0;
956 c=0;
957 pStart=0;
958
959 cc=trailCC=-1;
960
961 for(;;) {
962
963
964
965 prevSrc=srcIndex;
966
967 while(srcIndex!=srcLimit &&((c=src[srcIndex])<minNoMaybe ||
968 ((norm32=getNorm32(c))&ccOrQCMask)==0)){
969 prevCC=0;
970 ++srcIndex;
971 }
972
973
974 if(srcIndex!=prevSrc) {
975 length=(int)(srcIndex-prevSrc);
976 if((destIndex+length)<=destLimit) {
977 System.arraycopy(src,prevSrc,dest,destIndex,length);
978 }
979
980 destIndex+=length;
981 reorderStartIndex=destIndex;
982 }
983
984
985 if(srcIndex==srcLimit) {
986 break;
987 }
988
989
990 ++srcIndex;
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009 if(isNorm32HangulOrJamo(norm32)) {
1010 if(nx_contains(nx, c)) {
1011 c2=0;
1012 p=null;
1013 length=1;
1014 } else {
1015
1016 p=buffer;
1017 pStart=0;
1018 cc=trailCC=0;
1019
1020 c-=HANGUL_BASE;
1021
1022 c2=(char)(c%JAMO_T_COUNT);
1023 c/=JAMO_T_COUNT;
1024 if(c2>0) {
1025 buffer[2]=(char)(JAMO_T_BASE+c2);
1026 length=3;
1027 } else {
1028 length=2;
1029 }
1030
1031 buffer[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT);
1032 buffer[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT);
1033 }
1034 } else {
1035 if(isNorm32Regular(norm32)) {
1036 c2=0;
1037 length=1;
1038 } else {
1039
1040 if(srcIndex!=srcLimit &&
1041 UTF16.isTrailSurrogate(c2=src[srcIndex])) {
1042 ++srcIndex;
1043 length=2;
1044 norm32=getNorm32FromSurrogatePair(norm32, c2);
1045 } else {
1046 c2=0;
1047 length=1;
1048 norm32=0;
1049 }
1050 }
1051
1052
1053 if(nx_contains(nx, c, c2)) {
1054
1055 cc=trailCC=0;
1056 p=null;
1057 } else if((norm32&qcMask)==0) {
1058
1059 cc=trailCC=(int)((UNSIGNED_BYTE_MASK) & (norm32>>CC_SHIFT));
1060 p=null;
1061 pStart=-1;
1062 } else {
1063 DecomposeArgs arg = new DecomposeArgs();
1064
1065
1066
1067 pStart=decompose(norm32, qcMask, arg);
1068 p=extraData;
1069 length=arg.length;
1070 cc=arg.cc;
1071 trailCC=arg.trailCC;
1072 if(length==1) {
1073
1074 c=p[pStart];
1075 c2=0;
1076 p=null;
1077 pStart=-1;
1078 }
1079 }
1080 }
1081
1082
1083
1084
1085 if((destIndex+length)<=destLimit) {
1086 int reorderSplit=destIndex;
1087 if(p==null) {
1088
1089 if(cc!=0 && cc<prevCC) {
1090
1091
1092
1093 destIndex+=length;
1094 trailCC=insertOrdered(dest,reorderStartIndex,
1095 reorderSplit, destIndex, c, c2, cc);
1096 } else {
1097
1098 dest[destIndex++]=c;
1099 if(c2!=0) {
1100 dest[destIndex++]=c2;
1101 }
1102 }
1103 } else {
1104
1105
1106
1107 if(cc!=0 && cc<prevCC) {
1108
1109
1110
1111 destIndex+=length;
1112 trailCC=mergeOrdered(dest,reorderStartIndex,
1113 reorderSplit,p, pStart,pStart+length);
1114 } else {
1115
1116 do {
1117 dest[destIndex++]=p[pStart++];
1118 } while(--length>0);
1119 }
1120 }
1121 } else {
1122
1123
1124 destIndex+=length;
1125 }
1126
1127 prevCC=trailCC;
1128 if(prevCC==0) {
1129 reorderStartIndex=destIndex;
1130 }
1131 }
1132
1133 outTrailCC[0]=prevCC;
1134
1135 return destIndex - destStart;
1136 }
1137
1138
1139 private static final class NextCombiningArgs{
1140 char[] source;
1141 int start;
1142
1143 char c;
1144 char c2;
1145 int combiningIndex;
1146 char cc;
1147 }
1148
1149
1150 private static int getNextCombining(NextCombiningArgs args,
1151 int limit,
1152 UnicodeSet nx) {
1153 long norm32;
1154 int combineFlags;
1155
1156 args.c=args.source[args.start++];
1157 norm32=getNorm32(args.c);
1158
1159
1160 args.c2=0;
1161 args.combiningIndex=0;
1162 args.cc=0;
1163
1164 if((norm32&(CC_MASK|COMBINES_ANY))==0) {
1165 return 0;
1166 } else {
1167 if(isNorm32Regular(norm32)) {
1168
1169 } else if(isNorm32HangulOrJamo(norm32)) {
1170
1171 args.combiningIndex=(int)((UNSIGNED_INT_MASK)&(0xfff0|
1172 (norm32>>EXTRA_SHIFT)));
1173 return (int)(norm32&COMBINES_ANY);
1174 } else {
1175
1176 if(args.start!=limit && UTF16.isTrailSurrogate(args.c2=
1177 args.source[args.start])) {
1178 ++args.start;
1179 norm32=getNorm32FromSurrogatePair(norm32, args.c2);
1180 } else {
1181 args.c2=0;
1182 return 0;
1183 }
1184 }
1185
1186 if(nx_contains(nx, args.c, args.c2)) {
1187 return 0;
1188 }
1189
1190 args.cc= (char)((norm32>>CC_SHIFT)&0xff);
1191
1192 combineFlags=(int)(norm32&COMBINES_ANY);
1193 if(combineFlags!=0) {
1194 int index = getExtraDataIndex(norm32);
1195 args.combiningIndex=index>0 ? extraData[(index-1)] :0;
1196 }
1197
1198 return combineFlags;
1199 }
1200 }
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210 private static int getCombiningIndexFromStarter(char c,char c2){
1211 long norm32;
1212
1213 norm32=getNorm32(c);
1214 if(c2!=0) {
1215 norm32=getNorm32FromSurrogatePair(norm32, c2);
1216 }
1217 return extraData[(getExtraDataIndex(norm32)-1)];
1218 }
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237 private static int combine(char[]table,int tableStart,
1238 int combineBackIndex,
1239 int[] outValues) {
1240 int key;
1241 int value,value2;
1242
1243 if(outValues.length<2){
1244 throw new IllegalArgumentException();
1245 }
1246
1247
1248 for(;;) {
1249 key=table[tableStart++];
1250 if(key>=combineBackIndex) {
1251 break;
1252 }
1253 tableStart+= ((table[tableStart]&0x8000) != 0)? 2 : 1;
1254 }
1255
1256
1257 if((key&0x7fff)==combineBackIndex) {
1258
1259 value=table[tableStart];
1260
1261
1262 key=(int)((UNSIGNED_INT_MASK)&((value&0x2000)+1));
1263
1264
1265
1266
1267 if((value&0x8000) != 0) {
1268 if((value&0x4000) != 0) {
1269
1270 value=(int)((UNSIGNED_INT_MASK)&((value&0x3ff)|0xd800));
1271 value2=table[tableStart+1];
1272 } else {
1273
1274 value=table[tableStart+1];
1275 value2=0;
1276 }
1277 } else {
1278
1279 value&=0x1fff;
1280 value2=0;
1281 }
1282 outValues[0]=value;
1283 outValues[1]=value2;
1284 return key;
1285 } else {
1286
1287 return 0;
1288 }
1289 }
1290
1291
1292 private static final class RecomposeArgs{
1293 char[] source;
1294 int start;
1295 int limit;
1296 }
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311 private static char recompose(RecomposeArgs args, int options, UnicodeSet nx) {
1312 int remove, q, r;
1313 int combineFlags;
1314 int combineFwdIndex, combineBackIndex;
1315 int result, value=0, value2=0;
1316 int prevCC;
1317 boolean starterIsSupplementary;
1318 int starter;
1319 int[] outValues = new int[2];
1320 starter=-1;
1321 combineFwdIndex=0;
1322 starterIsSupplementary=false;
1323 prevCC=0;
1324
1325 NextCombiningArgs ncArg = new NextCombiningArgs();
1326 ncArg.source = args.source;
1327
1328 ncArg.cc =0;
1329 ncArg.c2 =0;
1330
1331 for(;;) {
1332 ncArg.start = args.start;
1333 combineFlags=getNextCombining(ncArg,args.limit,nx);
1334 combineBackIndex=ncArg.combiningIndex;
1335 args.start = ncArg.start;
1336
1337 if(((combineFlags&COMBINES_BACK)!=0) && starter!=-1) {
1338 if((combineBackIndex&0x8000)!=0) {
1339
1340
1341
1342
1343 if((options&BEFORE_PRI_29)!=0 || prevCC==0) {
1344 remove=-1;
1345 combineFlags=0;
1346 ncArg.c2=args.source[starter];
1347 if(combineBackIndex==0xfff2) {
1348
1349
1350
1351 ncArg.c2=(char)(ncArg.c2-JAMO_L_BASE);
1352 if(ncArg.c2<JAMO_L_COUNT) {
1353 remove=args.start-1;
1354 ncArg.c=(char)(HANGUL_BASE+(ncArg.c2*JAMO_V_COUNT+
1355 (ncArg.c-JAMO_V_BASE))*JAMO_T_COUNT);
1356 if(args.start!=args.limit &&
1357 (ncArg.c2=(char)(args.source[args.start]
1358 -JAMO_T_BASE))<JAMO_T_COUNT) {
1359 ++args.start;
1360 ncArg.c+=ncArg.c2;
1361 } else {
1362
1363 combineFlags=COMBINES_FWD;
1364 }
1365 if(!nx_contains(nx, ncArg.c)) {
1366 args.source[starter]=ncArg.c;
1367 } else {
1368
1369 if(!isHangulWithoutJamoT(ncArg.c)) {
1370 --args.start;
1371 }
1372
1373 remove=args.start;
1374 }
1375 }
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386 } else {
1387
1388 if(isHangulWithoutJamoT(ncArg.c2)) {
1389 ncArg.c2+=ncArg.c-JAMO_T_BASE;
1390 if(!nx_contains(nx, ncArg.c2)) {
1391 remove=args.start-1;
1392 args.source[starter]=ncArg.c2;
1393 }
1394 }
1395 }
1396
1397 if(remove!=-1) {
1398
1399 q=remove;
1400 r=args.start;
1401 while(r<args.limit) {
1402 args.source[q++]=args.source[r++];
1403 }
1404 args.start=remove;
1405 args.limit=q;
1406 }
1407
1408 ncArg.c2=0;
1409
1410 if(combineFlags!=0) {
1411
1412
1413
1414
1415
1416
1417 if(args.start==args.limit) {
1418 return (char)prevCC;
1419 }
1420
1421
1422 combineFwdIndex=0xfff0;
1423
1424
1425 continue;
1426 }
1427 }
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438 } else if(
1439
1440 !((combineFwdIndex&0x8000)!=0) &&
1441
1442 ((options&BEFORE_PRI_29)!=0 ?
1443 (prevCC!=ncArg.cc || prevCC==0) :
1444 (prevCC<ncArg.cc || prevCC==0)) &&
1445
1446 0!=(result=combine(combiningTable,combineFwdIndex,
1447 combineBackIndex, outValues)) &&
1448
1449 !nx_contains(nx, (char)value, (char)value2)
1450 ) {
1451 value=outValues[0];
1452 value2=outValues[1];
1453
1454
1455
1456 remove= ncArg.c2==0 ? args.start-1 : args.start-2;
1457
1458
1459 args.source[starter]=(char)value;
1460 if(starterIsSupplementary) {
1461 if(value2!=0) {
1462
1463 args.source[starter+1]=(char)value2;
1464 } else {
1465
1466
1467 starterIsSupplementary=false;
1468 q=starter+1;
1469 r=q+1;
1470 while(r<remove) {
1471 args.source[q++]=args.source[r++];
1472 }
1473 --remove;
1474 }
1475 } else if(value2!=0) {
1476 starterIsSupplementary=true;
1477 args.source[starter+1]=(char)value2;
1478
1479 }
1480
1481
1482
1483 if(remove<args.start) {
1484 q=remove;
1485 r=args.start;
1486 while(r<args.limit) {
1487 args.source[q++]=args.source[r++];
1488 }
1489 args.start=remove;
1490 args.limit=q;
1491 }
1492
1493
1494
1495
1496 if(args.start==args.limit) {
1497 return (char)prevCC;
1498 }
1499
1500
1501 if(result>1) {
1502 combineFwdIndex=getCombiningIndexFromStarter((char)value,
1503 (char)value2);
1504 } else {
1505 starter=-1;
1506 }
1507
1508
1509 continue;
1510 }
1511 }
1512
1513
1514 prevCC=ncArg.cc;
1515 if(args.start==args.limit) {
1516 return (char)prevCC;
1517 }
1518
1519
1520 if(ncArg.cc==0) {
1521
1522 if((combineFlags&COMBINES_FWD)!=0) {
1523
1524 if(ncArg.c2==0) {
1525 starterIsSupplementary=false;
1526 starter=args.start-1;
1527 } else {
1528 starterIsSupplementary=false;
1529 starter=args.start-2;
1530 }
1531 combineFwdIndex=combineBackIndex;
1532 } else {
1533
1534 starter=-1;
1535 }
1536 } else if((options&OPTIONS_COMPOSE_CONTIGUOUS)!=0) {
1537
1538 starter=-1;
1539 }
1540 }
1541 }
1542
1543
1544
1545 private static int findPreviousStarter(char[]src, int srcStart, int current,
1546 int ccOrQCMask,
1547 int decompQCMask,
1548 char minNoMaybe) {
1549 long norm32;
1550 PrevArgs args = new PrevArgs();
1551 args.src = src;
1552 args.start = srcStart;
1553 args.current = current;
1554
1555 while(args.start<args.current) {
1556 norm32= getPrevNorm32(args, minNoMaybe, ccOrQCMask|decompQCMask);
1557 if(isTrueStarter(norm32, ccOrQCMask, decompQCMask)) {
1558 break;
1559 }
1560 }
1561 return args.current;
1562 }
1563
1564
1565
1566
1567 private static int findNextStarter(char[] src,int start,int limit,
1568 int qcMask,
1569 int decompQCMask,
1570 char minNoMaybe) {
1571 int p;
1572 long norm32;
1573 int ccOrQCMask;
1574 char c, c2;
1575
1576 ccOrQCMask=CC_MASK|qcMask;
1577
1578 DecomposeArgs decompArgs = new DecomposeArgs();
1579
1580 for(;;) {
1581 if(start==limit) {
1582 break;
1583 }
1584 c=src[start];
1585 if(c<minNoMaybe) {
1586 break;
1587 }
1588
1589 norm32=getNorm32(c);
1590 if((norm32&ccOrQCMask)==0) {
1591 break;
1592 }
1593
1594 if(isNorm32LeadSurrogate(norm32)) {
1595
1596 if((start+1)==limit ||
1597 !UTF16.isTrailSurrogate(c2=(src[start+1]))){
1598
1599 break;
1600 }
1601 norm32=getNorm32FromSurrogatePair(norm32, c2);
1602
1603 if((norm32&ccOrQCMask)==0) {
1604 break;
1605 }
1606 } else {
1607 c2=0;
1608 }
1609
1610
1611 if((norm32&decompQCMask)!=0) {
1612
1613
1614 p=decompose(norm32, decompQCMask, decompArgs);
1615
1616
1617
1618 if(decompArgs.cc==0 && (getNorm32(extraData,p, qcMask)&qcMask)==0) {
1619 break;
1620 }
1621 }
1622
1623 start+= c2==0 ? 1 : 2;
1624 }
1625
1626 return start;
1627 }
1628
1629
1630 private static final class ComposePartArgs{
1631 int prevCC;
1632 int length;
1633 }
1634
1635
1636 private static char[] composePart(ComposePartArgs args,
1637 int prevStarter,
1638 char[] src, int start, int limit,
1639 int options,
1640 UnicodeSet nx) {
1641 int recomposeLimit;
1642 boolean compat =((options&OPTIONS_COMPAT)!=0);
1643
1644
1645 int[] outTrailCC = new int[1];
1646 char[] buffer = new char[(limit-prevStarter)*MAX_BUFFER_SIZE];
1647
1648 for(;;){
1649 args.length=decompose(src,prevStarter,(start),
1650 buffer,0,buffer.length,
1651 compat,outTrailCC,nx);
1652 if(args.length<=buffer.length){
1653 break;
1654 }else{
1655 buffer = new char[args.length];
1656 }
1657 }
1658
1659
1660 recomposeLimit=args.length;
1661
1662 if(args.length>=2) {
1663 RecomposeArgs rcArgs = new RecomposeArgs();
1664 rcArgs.source = buffer;
1665 rcArgs.start = 0;
1666 rcArgs.limit = recomposeLimit;
1667 args.prevCC=recompose(rcArgs, options, nx);
1668 recomposeLimit = rcArgs.limit;
1669 }
1670
1671
1672 args.length=recomposeLimit;
1673 return buffer;
1674 }
1675
1676 private static boolean composeHangul(char prev, char c,
1677 long norm32,
1678 char[] src,int[] srcIndex, int limit,
1679 boolean compat,
1680 char[] dest,int destIndex,
1681 UnicodeSet nx) {
1682 int start=srcIndex[0];
1683 if(isJamoVTNorm32JamoV(norm32)) {
1684
1685
1686 prev=(char)(prev-JAMO_L_BASE);
1687 if(prev<JAMO_L_COUNT) {
1688 c=(char)(HANGUL_BASE+(prev*JAMO_V_COUNT+
1689 (c-JAMO_V_BASE))*JAMO_T_COUNT);
1690
1691
1692
1693 if(start!=limit) {
1694 char next, t;
1695
1696 next=src[start];
1697 if((t=(char)(next-JAMO_T_BASE))<JAMO_T_COUNT) {
1698
1699 ++start;
1700 c+=t;
1701 } else if(compat) {
1702
1703
1704 norm32=getNorm32(next);
1705 if(isNorm32Regular(norm32) && ((norm32&QC_NFKD)!=0)) {
1706 int p ;
1707 DecomposeArgs dcArgs = new DecomposeArgs();
1708 p=decompose(norm32, QC_NFKD, dcArgs);
1709 if(dcArgs.length==1 &&
1710 (t=(char)(extraData[p]-JAMO_T_BASE))
1711 <JAMO_T_COUNT) {
1712
1713 ++start;
1714 c+=t;
1715 }
1716 }
1717 }
1718 }
1719 if(nx_contains(nx, c)) {
1720 if(!isHangulWithoutJamoT(c)) {
1721 --start;
1722 }
1723 return false;
1724 }
1725 dest[destIndex]=c;
1726 srcIndex[0]=start;
1727 return true;
1728 }
1729 } else if(isHangulWithoutJamoT(prev)) {
1730
1731
1732 c=(char)(prev+(c-JAMO_T_BASE));
1733 if(nx_contains(nx, c)) {
1734 return false;
1735 }
1736 dest[destIndex]=c;
1737 srcIndex[0]=start;
1738 return true;
1739 }
1740 return false;
1741 }
1742
1743
1744
1745
1746
1747
1748 public static int compose(char[] src, int srcStart, int srcLimit,
1749 char[] dest,int destStart,int destLimit,
1750 int options,UnicodeSet nx) {
1751
1752 int prevSrc, prevStarter;
1753 long norm32;
1754 int ccOrQCMask, qcMask;
1755 int reorderStartIndex, length;
1756 char c, c2, minNoMaybe;
1757 int cc, prevCC;
1758 int[] ioIndex = new int[1];
1759 int destIndex = destStart;
1760 int srcIndex = srcStart;
1761
1762 if((options&OPTIONS_COMPAT)!=0) {
1763 minNoMaybe=(char)indexes[INDEX_MIN_NFKC_NO_MAYBE];
1764 qcMask=QC_NFKC;
1765 } else {
1766 minNoMaybe=(char)indexes[INDEX_MIN_NFC_NO_MAYBE];
1767 qcMask=QC_NFC;
1768 }
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793 prevStarter=srcIndex;
1794
1795 ccOrQCMask=CC_MASK|qcMask;
1796 reorderStartIndex=0;
1797 prevCC=0;
1798
1799
1800 norm32=0;
1801 c=0;
1802
1803 for(;;) {
1804
1805
1806 prevSrc=srcIndex;
1807
1808 while(srcIndex!=srcLimit && ((c=src[srcIndex])<minNoMaybe ||
1809 ((norm32=getNorm32(c))&ccOrQCMask)==0)) {
1810 prevCC=0;
1811 ++srcIndex;
1812 }
1813
1814
1815
1816 if(srcIndex!=prevSrc) {
1817 length=(int)(srcIndex-prevSrc);
1818 if((destIndex+length)<=destLimit) {
1819 System.arraycopy(src,prevSrc,dest,destIndex,length);
1820 }
1821 destIndex+=length;
1822 reorderStartIndex=destIndex;
1823
1824
1825
1826 prevStarter=srcIndex-1;
1827 if(UTF16.isTrailSurrogate(src[prevStarter]) &&
1828 prevSrc<prevStarter &&
1829 UTF16.isLeadSurrogate(src[(prevStarter-1)])) {
1830 --prevStarter;
1831 }
1832
1833 prevSrc=srcIndex;
1834 }
1835
1836
1837 if(srcIndex==srcLimit) {
1838 break;
1839 }
1840
1841
1842 ++srcIndex;
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874 if(isNorm32HangulOrJamo(norm32)) {
1875
1876
1877
1878
1879
1880
1881 prevCC=cc=0;
1882 reorderStartIndex=destIndex;
1883 ioIndex[0]=srcIndex;
1884 if(
1885 destIndex>0 &&
1886 composeHangul(src[(prevSrc-1)], c, norm32,src, ioIndex,
1887 srcLimit, (options&OPTIONS_COMPAT)!=0, dest,
1888 destIndex<=destLimit ? destIndex-1: 0,
1889 nx)
1890 ) {
1891 srcIndex=ioIndex[0];
1892 prevStarter=srcIndex;
1893 continue;
1894 }
1895
1896 srcIndex = ioIndex[0];
1897
1898
1899
1900 c2=0;
1901 length=1;
1902 prevStarter=prevSrc;
1903 } else {
1904 if(isNorm32Regular(norm32)) {
1905 c2=0;
1906 length=1;
1907 } else {
1908
1909 if(srcIndex!=srcLimit &&
1910 UTF16.isTrailSurrogate(c2=src[srcIndex])) {
1911 ++srcIndex;
1912 length=2;
1913 norm32=getNorm32FromSurrogatePair(norm32, c2);
1914 } else {
1915
1916 c2=0;
1917 length=1;
1918 norm32=0;
1919 }
1920 }
1921 ComposePartArgs args =new ComposePartArgs();
1922
1923
1924 if(nx_contains(nx, c, c2)) {
1925
1926 cc=0;
1927 } else if((norm32&qcMask)==0) {
1928 cc=(int)((UNSIGNED_BYTE_MASK)&(norm32>>CC_SHIFT));
1929 } else {
1930 char[] p;
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945 int decompQCMask=(qcMask<<2)&0xf;
1946
1947
1948
1949
1950
1951 if(isTrueStarter(norm32, CC_MASK|qcMask, decompQCMask)) {
1952 prevStarter=prevSrc;
1953 } else {
1954
1955 destIndex-=prevSrc-prevStarter;
1956 }
1957
1958
1959 srcIndex=findNextStarter(src, srcIndex,srcLimit, qcMask,
1960 decompQCMask, minNoMaybe);
1961
1962 args.prevCC = prevCC;
1963
1964 args.length = length;
1965 p=composePart(args,prevStarter,src,srcIndex,srcLimit,options,nx);
1966
1967 if(p==null) {
1968
1969 break;
1970 }
1971
1972 prevCC = args.prevCC;
1973 length = args.length;
1974
1975
1976
1977 if((destIndex+args.length)<=destLimit) {
1978 int i=0;
1979 while(i<args.length) {
1980 dest[destIndex++]=p[i++];
1981 --length;
1982 }
1983 } else {
1984
1985
1986 destIndex+=length;
1987 }
1988
1989 prevStarter=srcIndex;
1990 continue;
1991 }
1992 }
1993
1994
1995 if((destIndex+length)<=destLimit) {
1996 if(cc!=0 && cc<prevCC) {
1997
1998
1999 int reorderSplit= destIndex;
2000 destIndex+=length;
2001 prevCC=insertOrdered(dest,reorderStartIndex, reorderSplit,
2002 destIndex, c, c2, cc);
2003 } else {
2004
2005 dest[destIndex++]=c;
2006 if(c2!=0) {
2007 dest[destIndex++]=c2;
2008 }
2009 prevCC=cc;
2010 }
2011 } else {
2012
2013
2014 destIndex+=length;
2015 prevCC=cc;
2016 }
2017 }
2018
2019 return destIndex - destStart;
2020 }
2021
2022 public static int getCombiningClass(int c) {
2023 long norm32;
2024 norm32=getNorm32(c);
2025 return (char)((norm32>>CC_SHIFT)&0xFF);
2026 }
2027
2028 public static boolean isFullCompositionExclusion(int c) {
2029 if(isFormatVersion_2_1) {
2030 int aux =AuxTrieImpl.auxTrie.getCodePointValue(c);
2031 return (boolean)((aux & AUX_COMP_EX_MASK)!=0);
2032 } else {
2033 return false;
2034 }
2035 }
2036
2037 public static boolean isCanonSafeStart(int c) {
2038 if(isFormatVersion_2_1) {
2039 int aux = AuxTrieImpl.auxTrie.getCodePointValue(c);
2040 return (boolean)((aux & AUX_UNSAFE_MASK)==0);
2041 } else {
2042 return false;
2043 }
2044 }
2045
2046
2047 public static boolean isNFSkippable(int c, NormalizerBase.Mode mode, long mask) {
2048 long norm32;
2049 mask = mask & UNSIGNED_INT_MASK;
2050 char aux;
2051
2052
2053 norm32 = getNorm32(c);
2054
2055 if((norm32&mask)!=0) {
2056 return false;
2057 }
2058
2059 if(mode == NormalizerBase.NFD || mode == NormalizerBase.NFKD || mode == NormalizerBase.NONE){
2060 return true;
2061 }
2062
2063
2064
2065 if((norm32& QC_NFD)==0) {
2066 return true;
2067 }
2068
2069
2070 if(isNorm32HangulOrJamo(norm32)) {
2071
2072 return !isHangulWithoutJamoT((char)c);
2073 }
2074
2075
2076
2077 if(!isFormatVersion_2_2) {
2078 return false;
2079 }
2080
2081
2082 aux = AuxTrieImpl.auxTrie.getCodePointValue(c);
2083 return (aux&AUX_NFC_SKIP_F_MASK)==0;
2084
2085
2086 }
2087
2088 public static UnicodeSet addPropertyStarts(UnicodeSet set) {
2089 int c;
2090
2091
2092
2093 TrieIterator normIter = new TrieIterator(NormTrieImpl.normTrie);
2094 RangeValueIterator.Element normResult = new RangeValueIterator.Element();
2095
2096 while(normIter.next(normResult)){
2097 set.add(normResult.start);
2098 }
2099
2100
2101 TrieIterator fcdIter = new TrieIterator(FCDTrieImpl.fcdTrie);
2102 RangeValueIterator.Element fcdResult = new RangeValueIterator.Element();
2103
2104 while(fcdIter.next(fcdResult)){
2105 set.add(fcdResult.start);
2106 }
2107
2108 if(isFormatVersion_2_1){
2109
2110 TrieIterator auxIter = new TrieIterator(AuxTrieImpl.auxTrie);
2111 RangeValueIterator.Element auxResult = new RangeValueIterator.Element();
2112 while(auxIter.next(auxResult)){
2113 set.add(auxResult.start);
2114 }
2115 }
2116
2117 for(c=HANGUL_BASE; c<HANGUL_BASE+HANGUL_COUNT; c+=JAMO_T_COUNT) {
2118 set.add(c);
2119 set.add(c+1);
2120 }
2121 set.add(HANGUL_BASE+HANGUL_COUNT);
2122 return set;
2123 }
2124
2125
2126
2127
2128
2129
2130
2131
2132 public static final int quickCheck(int c, int modeValue) {
2133 final int qcMask[]={
2134 0, 0, QC_NFD, QC_NFKD, QC_NFC, QC_NFKC
2135 };
2136
2137 int norm32=(int)getNorm32(c)&qcMask[modeValue];
2138
2139 if(norm32==0) {
2140 return 1;
2141 } else if((norm32&QC_ANY_NO)!=0) {
2142 return 0;
2143 } else {
2144 return 2;
2145 }
2146 }
2147
2148 private static int strCompare(char[] s1, int s1Start, int s1Limit,
2149 char[] s2, int s2Start, int s2Limit,
2150 boolean codePointOrder) {
2151
2152 int start1, start2, limit1, limit2;
2153
2154 char c1, c2;
2155
2156
2157 start1=s1Start;
2158 start2=s2Start;
2159
2160 int length1, length2;
2161
2162 length1 = s1Limit - s1Start;
2163 length2 = s2Limit - s2Start;
2164
2165 int lengthResult;
2166
2167 if(length1<length2) {
2168 lengthResult=-1;
2169 limit1=start1+length1;
2170 } else if(length1==length2) {
2171 lengthResult=0;
2172 limit1=start1+length1;
2173 } else {
2174 lengthResult=1;
2175 limit1=start1+length2;
2176 }
2177
2178 if(s1==s2) {
2179 return lengthResult;
2180 }
2181
2182 for(;;) {
2183
2184 if(s1Start==limit1) {
2185 return lengthResult;
2186 }
2187
2188 c1=s1[s1Start];
2189 c2=s2[s2Start];
2190 if(c1!=c2) {
2191 break;
2192 }
2193 ++s1Start;
2194 ++s2Start;
2195 }
2196
2197
2198 limit1=start1+length1;
2199 limit2=start2+length2;
2200
2201
2202
2203 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
2204
2205
2206 if(
2207 ( c1<=0xdbff && (s1Start+1)!=limit1 &&
2208 UTF16.isTrailSurrogate(s1[(s1Start+1)])
2209 ) ||
2210 ( UTF16.isTrailSurrogate(c1) && start1!=s1Start &&
2211 UTF16.isLeadSurrogate(s1[(s1Start-1)])
2212 )
2213 ) {
2214
2215 } else {
2216
2217 c1-=0x2800;
2218 }
2219
2220 if(
2221 ( c2<=0xdbff && (s2Start+1)!=limit2 &&
2222 UTF16.isTrailSurrogate(s2[(s2Start+1)])
2223 ) ||
2224 ( UTF16.isTrailSurrogate(c2) && start2!=s2Start &&
2225 UTF16.isLeadSurrogate(s2[(s2Start-1)])
2226 )
2227 ) {
2228
2229 } else {
2230
2231 c2-=0x2800;
2232 }
2233 }
2234
2235
2236 return (int)c1-(int)c2;
2237 }
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280 private static final int OPTIONS_NX_MASK=0x1f;
2281 private static final int OPTIONS_UNICODE_MASK=0xe0;
2282 public static final int OPTIONS_SETS_MASK=0xff;
2283
2284 private static final UnicodeSet[] nxCache = new UnicodeSet[OPTIONS_SETS_MASK+1];
2285
2286
2287
2288
2289
2290
2291
2292 private static final int NX_HANGUL = 1;
2293
2294
2295
2296
2297 private static final int NX_CJK_COMPAT=2;
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310 public static final int BEFORE_PRI_29=0x100;
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320 public static final int OPTIONS_COMPAT=0x1000;
2321
2322 public static final int OPTIONS_COMPOSE_CONTIGUOUS=0x2000;
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333 private static final synchronized UnicodeSet internalGetNXHangul() {
2334
2335
2336 if(nxCache[NX_HANGUL]==null) {
2337 nxCache[NX_HANGUL]=new UnicodeSet(0xac00, 0xd7a3);
2338 }
2339 return nxCache[NX_HANGUL];
2340 }
2341
2342 private static final synchronized UnicodeSet internalGetNXCJKCompat() {
2343
2344
2345 if(nxCache[NX_CJK_COMPAT]==null) {
2346
2347
2348 UnicodeSet set, hasDecomp;
2349
2350 set=new UnicodeSet("[:Ideographic:]");
2351
2352
2353 hasDecomp=new UnicodeSet();
2354
2355
2356 UnicodeSetIterator it = new UnicodeSetIterator(set);
2357 int start, end;
2358 long norm32;
2359
2360 while(it.nextRange() && (it.codepoint != UnicodeSetIterator.IS_STRING)) {
2361 start=it.codepoint;
2362 end=it.codepointEnd;
2363 while(start<=end) {
2364 norm32 = getNorm32(start);
2365 if((norm32 & QC_NFD)>0) {
2366 hasDecomp.add(start);
2367 }
2368 ++start;
2369 }
2370 }
2371
2372
2373 nxCache[NX_CJK_COMPAT]=hasDecomp;
2374
2375 }
2376
2377 return nxCache[NX_CJK_COMPAT];
2378 }
2379
2380 private static final synchronized UnicodeSet internalGetNXUnicode(int options) {
2381 options &= OPTIONS_UNICODE_MASK;
2382 if(options==0) {
2383 return null;
2384 }
2385
2386 if(nxCache[options]==null) {
2387
2388 UnicodeSet set = new UnicodeSet();
2389
2390 switch(options) {
2391 case NormalizerBase.UNICODE_3_2:
2392 set.applyPattern("[:^Age=3.2:]");
2393 break;
2394 default:
2395 return null;
2396 }
2397
2398 nxCache[options]=set;
2399 }
2400
2401 return nxCache[options];
2402 }
2403
2404
2405 private static final synchronized UnicodeSet internalGetNX(int options) {
2406 options&=OPTIONS_SETS_MASK;
2407
2408 if(nxCache[options]==null) {
2409
2410 if(options==NX_HANGUL) {
2411 return internalGetNXHangul();
2412 }
2413 if(options==NX_CJK_COMPAT) {
2414 return internalGetNXCJKCompat();
2415 }
2416 if((options & OPTIONS_UNICODE_MASK)!=0 && (options & OPTIONS_NX_MASK)==0) {
2417 return internalGetNXUnicode(options);
2418 }
2419
2420
2421 UnicodeSet set;
2422 UnicodeSet other;
2423
2424 set=new UnicodeSet();
2425
2426
2427 if((options & NX_HANGUL)!=0 && null!=(other=internalGetNXHangul())) {
2428 set.addAll(other);
2429 }
2430 if((options&NX_CJK_COMPAT)!=0 && null!=(other=internalGetNXCJKCompat())) {
2431 set.addAll(other);
2432 }
2433 if((options&OPTIONS_UNICODE_MASK)!=0 && null!=(other=internalGetNXUnicode(options))) {
2434 set.addAll(other);
2435 }
2436
2437 nxCache[options]=set;
2438 }
2439 return nxCache[options];
2440 }
2441
2442 public static final UnicodeSet getNX(int options) {
2443 if((options&=OPTIONS_SETS_MASK)==0) {
2444
2445 return null;
2446 } else {
2447 return internalGetNX(options);
2448 }
2449 }
2450
2451 private static final boolean nx_contains(UnicodeSet nx, int c) {
2452 return nx!=null && nx.contains(c);
2453 }
2454
2455 private static final boolean nx_contains(UnicodeSet nx, char c, char c2) {
2456 return nx!=null && nx.contains(c2==0 ? c : UCharacterProperty.getRawSupplementary(c, c2));
2457 }
2458
2459
2460
2461
2462
2463
2464
2465
2466 public static int getDecompose(int chars[], String decomps[]) {
2467 DecomposeArgs args = new DecomposeArgs();
2468 int length=0;
2469 long norm32 = 0;
2470 int ch = -1;
2471 int index = 0;
2472 int i = 0;
2473
2474 while (++ch < 0x2fa1e) {
2475
2476
2477 if (ch == 0x30ff)
2478 ch = 0xf900;
2479 else if (ch == 0x10000)
2480 ch = 0x1d15e;
2481 else if (ch == 0x1d1c1)
2482 ch = 0x2f800;
2483
2484 norm32 = NormalizerImpl.getNorm32(ch);
2485 if((norm32 & QC_NFD)!=0 && i < chars.length) {
2486 chars[i] = ch;
2487 index = decompose(norm32, args);
2488 decomps[i++] = new String(extraData,index, args.length);
2489 }
2490 }
2491 return i;
2492 }
2493
2494
2495
2496
2497 private static boolean needSingleQuotation(char c) {
2498 return (c >= 0x0009 && c <= 0x000D) ||
2499 (c >= 0x0020 && c <= 0x002F) ||
2500 (c >= 0x003A && c <= 0x0040) ||
2501 (c >= 0x005B && c <= 0x0060) ||
2502 (c >= 0x007B && c <= 0x007E);
2503 }
2504
2505 public static String canonicalDecomposeWithSingleQuotation(String string) {
2506 char[] src = string.toCharArray();
2507 int srcIndex = 0;
2508 int srcLimit = src.length;
2509 char[] dest = new char[src.length * 3];
2510 int destIndex = 0;
2511 int destLimit = dest.length;
2512
2513 char[] buffer = new char[3];
2514 int prevSrc;
2515 long norm32;
2516 int ccOrQCMask;
2517 int qcMask = QC_NFD;
2518 int reorderStartIndex, length;
2519 char c, c2;
2520 char minNoMaybe = (char)indexes[INDEX_MIN_NFD_NO_MAYBE];
2521 int cc, prevCC, trailCC;
2522 char[] p;
2523 int pStart;
2524
2525
2526
2527 ccOrQCMask = CC_MASK | qcMask;
2528 reorderStartIndex = 0;
2529 prevCC = 0;
2530 norm32 = 0;
2531 c = 0;
2532 pStart = 0;
2533
2534 cc = trailCC = -1;
2535 for(;;) {
2536 prevSrc=srcIndex;
2537
2538 while (srcIndex != srcLimit &&
2539 (( c = src[srcIndex]) < minNoMaybe ||
2540 ((norm32 = getNorm32(c)) & ccOrQCMask) == 0 ||
2541 ( c >= '\uac00' && c <= '\ud7a3'))){
2542
2543 prevCC = 0;
2544 ++srcIndex;
2545 }
2546
2547
2548 if (srcIndex != prevSrc) {
2549 length = (int)(srcIndex - prevSrc);
2550 if ((destIndex + length) <= destLimit) {
2551 System.arraycopy(src,prevSrc,dest,destIndex,length);
2552 }
2553
2554 destIndex += length;
2555 reorderStartIndex = destIndex;
2556 }
2557
2558
2559 if(srcIndex == srcLimit) {
2560 break;
2561 }
2562
2563 ++srcIndex;
2564
2565 if(isNorm32Regular(norm32)) {
2566 c2 = 0;
2567 length = 1;
2568 } else {
2569
2570 if(srcIndex != srcLimit &&
2571 Character.isLowSurrogate(c2 = src[srcIndex])) {
2572 ++srcIndex;
2573 length = 2;
2574 norm32 = getNorm32FromSurrogatePair(norm32, c2);
2575 } else {
2576 c2 = 0;
2577 length = 1;
2578 norm32 = 0;
2579 }
2580 }
2581
2582
2583 if((norm32 & qcMask) == 0) {
2584
2585 cc = trailCC = (int)((UNSIGNED_BYTE_MASK) & (norm32 >> CC_SHIFT));
2586 p = null;
2587 pStart = -1;
2588 } else {
2589 DecomposeArgs arg = new DecomposeArgs();
2590
2591
2592 pStart = decompose(norm32, qcMask, arg);
2593 p = extraData;
2594 length = arg.length;
2595 cc = arg.cc;
2596 trailCC = arg.trailCC;
2597 if(length == 1) {
2598
2599 c = p[pStart];
2600 c2 = 0;
2601 p = null;
2602 pStart = -1;
2603 }
2604 }
2605
2606 if((destIndex + length * 3) >= destLimit) {
2607
2608 char[] tmpBuf = new char[destLimit * 2];
2609 System.arraycopy(dest, 0, tmpBuf, 0, destIndex);
2610 dest = tmpBuf;
2611 destLimit = dest.length;
2612 }
2613
2614 {
2615 int reorderSplit = destIndex;
2616 if(p == null) {
2617
2618 if (needSingleQuotation(c)) {
2619
2620
2621 dest[destIndex++] = '\'';
2622 dest[destIndex++] = c;
2623 dest[destIndex++] = '\'';
2624 trailCC = 0;
2625 } else if(cc != 0 && cc < prevCC) {
2626
2627
2628 destIndex += length;
2629 trailCC = insertOrdered(dest,reorderStartIndex,
2630 reorderSplit, destIndex, c, c2, cc);
2631 } else {
2632
2633 dest[destIndex++] = c;
2634 if(c2 != 0) {
2635 dest[destIndex++] = c2;
2636 }
2637 }
2638 } else {
2639
2640
2641 if (needSingleQuotation(p[pStart])) {
2642 dest[destIndex++] = '\'';
2643 dest[destIndex++] = p[pStart++];
2644 dest[destIndex++] = '\'';
2645 length--;
2646 do {
2647 dest[destIndex++] = p[pStart++];
2648 } while(--length > 0);
2649 } else
2650 if(cc != 0 && cc < prevCC) {
2651 destIndex += length;
2652 trailCC = mergeOrdered(dest,reorderStartIndex,
2653 reorderSplit,p, pStart,pStart+length);
2654 } else {
2655
2656 do {
2657 dest[destIndex++] = p[pStart++];
2658 } while(--length > 0);
2659 }
2660 }
2661 }
2662 prevCC = trailCC;
2663 if(prevCC == 0) {
2664 reorderStartIndex = destIndex;
2665 }
2666 }
2667 return new String(dest, 0, destIndex);
2668 }
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689 public static final int WITHOUT_CORRIGENDUM4_CORRECTIONS=0x40000;
2690
2691 private static final char[][] corrigendum4MappingTable = {
2692 {'\uD844', '\uDF6A'},
2693 {'\u5F33'},
2694 {'\u43AB'},
2695 {'\u7AAE'},
2696 {'\u4D57'}};
2697
2698
2699
2700
2701
2702 public static String convert(String str) {
2703 if (str == null) {
2704 return null;
2705 }
2706
2707 int ch = UCharacterIterator.DONE;
2708 StringBuffer dest = new StringBuffer();
2709 UCharacterIterator iter = UCharacterIterator.getInstance(str);
2710
2711 while ((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
2712 switch (ch) {
2713 case 0x2F868:
2714 dest.append(corrigendum4MappingTable[0]);
2715 break;
2716 case 0x2F874:
2717 dest.append(corrigendum4MappingTable[1]);
2718 break;
2719 case 0x2F91F:
2720 dest.append(corrigendum4MappingTable[2]);
2721 break;
2722 case 0x2F95F:
2723 dest.append(corrigendum4MappingTable[3]);
2724 break;
2725 case 0x2F9BF:
2726 dest.append(corrigendum4MappingTable[4]);
2727 break;
2728 default:
2729 UTF16.append(dest,ch);
2730 break;
2731 }
2732 }
2733
2734 return dest.toString();
2735 }
2736 }